### calculate orientation order and related statistics
### for subsets of graphml cities from Boeing (2021)
### file started by NN 17 Feb 2022
### file updated by NN 7 July 2022

### this version: loop over the full 36 cities in the sample
### calculate entropy stats for each t community within each of them

#cd Dropbox/UMProjects_20172018/UrbanEnvironment/code/test_directory/python_scripts
#conda activate ox

import json
import multiprocessing as mp
import networkx as nx
import numpy as np
import os
import osmnx as ox
import pandas as pd
import random
import time
from scipy import stats



#### PART 0: define which city and which subset years you're running over
    #as you prepare more you just need to add it to here!
cities = ["abuja-2544", "abuja-2565", "accra-1910", "addis_ababa-5134",  "arusha-4800", "bamako-1553" , "beira-4521"  ,"brikama-1458" ,  "buchanan-1533" , "bujumbura-4078" ,  "cape_town-3268" , "gaborone-3587", "gombe-2932" , "ibadan-2189"  ,"johannesburg-3673" ,  "kampala-4427" , "kara-2026" , "khartoum-4335", "kigali-4172" , "kinshasa-3209", "lagos-2125" , "luanda-3050" , "lubumbashi-3756" , "malabo-2762" , "manzini-4080" , "maputo-4220",  "maseru-3631" , "monrovia-1526" , "nairobi-4808" , "nakuru-4729", "ndola-3859" , "oyo-2191" , "pointe_noire-2982" , "port_elizabeth-3505",  "porto_novo-2101",  "touba-1473" ,  "windhoek-3260"]
citygraphmls = ["abuja-2544.graphml", "abuja-2565.graphml", "accra-1910.graphml", "addis_ababa-5134.graphml",  "arusha-4800.graphml", "bamako-1553.graphml", "beira-4521.graphml", "brikama-1458.graphml", "buchanan-1533.graphml", "bujumbura-4078.graphml", "cape_town-3268.graphml", "gaborone-3587.graphml", "gombe-2932.graphml", "ibadan-2189.graphml", "johannesburg-3673.graphml", "kampala-4427.graphml", "kara-2026.graphml", "khartoum-4335.graphml", "kigali-4172.graphml", "kinshasa-3209.graphml", "lagos-2125.graphml", "luanda-3050.graphml", "lubumbashi-3756.graphml", "malabo-2762.graphml", "manzini-4080.graphml", "maputo-4220.graphml", "maseru-3631.graphml", "monrovia-1526.graphml", "nairobi-4808.graphml", "nakuru-4729.graphml", "ndola-3859.graphml", "oyo-2191.graphml", "pointe_noire-2982.graphml", "port_elizabeth-3505.graphml", "porto_novo-2101.graphml", "touba-1473.graphml", "windhoek-3260.graphml"  ]
numtcomslist = [33, 315, 565, 587, 79,  354,   28,   15,   17 ,  86, 1074,  103,   62,  421, 1636,  645 ,  13,  582 , 139 , 244 , 828 , 433 , 106 ,  54 ,  17,  236, 80,  162 , 319 ,  13,   59,   61  , 73 , 204 ,  90 ,  71,  103]
countries = ["nigeria-NGA_graphml", "nigeria-NGA_graphml", "ghana-GHA_graphml", "ethiopia-ETH_graphml",  "tanzania-TZA_graphml", "mali-MLI_graphml", "mozambique-MOZ_graphml", "gambia-GMB_graphml", "liberia-LBR_graphml", "burundi-BDI_graphml", "south_africa-ZAF_graphml", "botswana-BWA_graphml", "nigeria-NGA_graphml", "nigeria-NGA_graphml", "south_africa-ZAF_graphml", "uganda-UGA_graphml", "togo-TGO_graphml", "sudan-SDN_graphml", "rwanda-RWA_graphml", "democratic_republic_of_the_congo-COD_graphml", "nigeria-NGA_graphml", "angola-AGO_graphml", "democratic_republic_of_the_congo-COD_graphml", "equatorial_guinea-GNQ_graphml", "swaziland-SWZ_graphml", "mozambique-MOZ_graphml", "lesotho-LSO_graphml", "liberia-LBR_graphml", "kenya-KEN_graphml", "kenya-KEN_graphml", "zambia-ZMB_graphml", "nigeria-NGA_graphml", "republic_of_congo-COG_graphml", "south_africa-ZAF_graphml", "benin-BEN_graphml", "senegal-SEN_graphml", "namibia-NAM_graphml"  ]
outputfolder = "entropy_stats"

for x in range(0, len(citygraphmls)):
    city = cities[x]
    citygraphml = citygraphmls[x]
    numtcoms = numtcomslist[x]
    country = countries[x]
    print(city)
    tcomslabels = []
    for z in range(1, numtcoms+1):
        new = str(z)
        tcomslabels.append("_".join(["tcom", new]))

#### PART 1: Frontmatter and define the functions

    with open('config.json') as f:
        config = json.load(f)

    ox.config(log_file=True,
          logs_folder=config['osmnx_log_path'])

    graphml_folder = config['models_graphml_path'] #where to load graphml files
    indicators_street_path = config['indicators_street_path'] #where to save output street network indicators
    save_every_n = 1 #save results every n cities

    clean_int_tol = 10 #meters for intersection cleaning tolerance

    entropy_bins = 36
    min_entropy_bins = 4 #perfect grid
    perfect_grid = [1] * min_entropy_bins + [0] * (entropy_bins - min_entropy_bins)
    perfect_grid_entropy = stats.entropy(perfect_grid)

# bearing and entropy functions
    def reverse_bearing(x):
        return x + 180 if x < 180 else x - 180


    def get_unweighted_bearings(Gu, threshold):
    # calculate edge bearings
    # threshold lets you discard streets < some length from the bearings analysis
        b = pd.Series([d['bearing'] for u, v, k, d in Gu.edges(keys=True, data=True) if d['length'] > threshold])
        return pd.concat([b, b.map(reverse_bearing)]).reset_index(drop='True')


    def count_and_merge(n, bearings):
    # make twice as many bins as desired, then merge them in pairs
    # prevents bin-edge effects around common values like 0° and 90°
        n = n * 2
        bins = np.arange(n + 1) * 360 / n
        count, _ = np.histogram(bearings, bins=bins)

    # move the last bin to the front, so eg 0.01° and 359.99° will be binned together
        count = np.roll(count, 1)
        return count[::2] + count[1::2]


    def calculate_entropy(data, n):
        bin_counts = count_and_merge(n, data)
        entropy = stats.entropy(bin_counts)
        return entropy


    def calculate_orientation_entropy(Gu, threshold=10, entropy_bins=entropy_bins):
    # get edge bearings and calculate entropy of undirected network
        Gu = ox.add_edge_bearings(Gu)
        bearings = get_unweighted_bearings(Gu, threshold=threshold)
        entropy = calculate_entropy(bearings.dropna(), entropy_bins)
        return entropy

    def calculate_orientation_order(orientation_entropy, entropy_bins=entropy_bins, perfect_grid_entropy=perfect_grid_entropy):
        max_entropy = np.log(entropy_bins)
        orientation_order = 1 - ((orientation_entropy - perfect_grid_entropy) / (max_entropy - perfect_grid_entropy)) ** 2
        return orientation_order

# In[ ]:

    def calculate_circuity(Gu, length_total):

        coords = np.array([[Gu.nodes[u]['y'], Gu.nodes[u]['x'], Gu.nodes[v]['y'], Gu.nodes[v]['x']] for u, v, k in Gu.edges(keys=True)])
        df_coords = pd.DataFrame(coords, columns=['u_y', 'u_x', 'v_y', 'v_x'])

        distances = ox.distance.great_circle_vec(lat1=df_coords['u_y'],
                                             lng1=df_coords['u_x'],
                                             lat2=df_coords['v_y'],
                                             lng2=df_coords['v_x'])

        circuity_avg = length_total / distances.fillna(value=0).sum()
        return circuity_avg


    def intersection_counts(Gup, spn, tolerance=clean_int_tol):

        node_ids = set(Gup.nodes)
        intersect_count = len([1 for node, count in spn.items() if (count > 1) and (node in node_ids)])

        intersect_count_clean = len(ox.consolidate_intersections(Gup,
                                                             tolerance=tolerance,
                                                             rebuild_graph=False,
                                                             dead_ends=False))

        intersect_count_clean_topo = len(ox.consolidate_intersections(Gup,
                                                                  tolerance=tolerance,
                                                                  rebuild_graph=True,
                                                                  reconnect_edges=False,
                                                                  dead_ends=False))

        return intersect_count, intersect_count_clean, intersect_count_clean_topo


    def get_clustering(G):

    # get directed graph without parallel edges
        D = ox.get_digraph(G, weight="length")

    # average clustering coefficient for the directed graph ignoring parallel edges
        cc_avg_dir = nx.average_clustering(D)

    # average clustering coefficient (weighted) for the directed graph ignoring parallel edges
        cc_wt_avg_dir = nx.average_clustering(D, weight="length")

    # max pagerank (weighted) in directed graph ignoring parallel edges
        pagerank_max = max(nx.pagerank(D, weight="length").values())

    # get undirected graph without parallel edges
        U = nx.Graph(D)

    # average clustering coefficient for the undirected graph ignoring parallel edges
        cc_avg_undir = nx.average_clustering(U)

    # average clustering coefficient (weighted) for the undirected graph ignoring parallel edges
        cc_wt_avg_undir = nx.average_clustering(U, weight="length")

        return cc_avg_dir, cc_avg_undir, cc_wt_avg_dir, cc_wt_avg_undir, pagerank_max


# calculate elevation & grade stats
    def elevation_grades(Gu):

        grades = pd.Series(nx.get_edge_attributes(Gu, 'grade_abs'))
        elevs = pd.Series(nx.get_node_attributes(Gu, 'elevation'))
        elev_iqr = elevs.quantile(0.75) - elevs.quantile(0.25)
        elev_range = elevs.max() - elevs.min()

        return grades.mean(), grades.median(), elevs.mean(), elevs.median(), elevs.std(), elev_range, elev_iqr


# In[ ]:


    def save_results(indicators, output_path):

        output_folder = output_path[:output_path.rfind('/')]
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        df = pd.DataFrame(indicators).T.reset_index(drop=True)
        df.to_csv(output_path, index=False, encoding='utf-8')
        print(ox.ts(), f'saved {len(indicators)} results to disk at "{output_path}"')
        return df



    #open the main graphml file from Boeing
    filepath = os.path.join(graphml_folder, country, citygraphml)
    G1 = ox.load_graphml(filepath=filepath)

#start the loop to actually do everything here:
    for k in range(numtcoms):
        subsetnamenocsv = tcomslabels[k]
        subsetname=".".join([tcomslabels[k], "csv"])
        subsetname2="_".join([city, subsetname])
        #open a csv of the node ids you want to subset to (exported by R)
        from pandas import *
        filepath2 = os.path.join(indicators_street_path, "idlists", subsetname2)
        t = read_csv(filepath2)
        tlist = t['x'].tolist()

        #make a subgraph, G, of G1 that is subset to those node node_ids
        G2 = G1.subgraph(tlist)
        G3 = G2.to_undirected()
        #    print(ox.projection.is_projected(G3.graph["crs"]))
        ox.add_edge_bearings(G3, precision=1)

        ent = ox.bearing.orientation_entropy(G3, num_bins=36, min_length=0, weight=None)
        order = calculate_orientation_order(ent, entropy_bins=entropy_bins, perfect_grid_entropy=perfect_grid_entropy)
        n = len(G3.nodes)
        streets_per_node = nx.get_node_attributes(G3, 'street_count')
        prop_4way = list(streets_per_node.values()).count(4) / n
        prop_3way = list(streets_per_node.values()).count(3) / n
        prop_deadend = list(streets_per_node.values()).count(1) / n

        # elevation and grade
        grade_mean, grade_median, elev_mean, elev_median, elev_std, elev_range, elev_iqr = elevation_grades(G3)

        #circuity
        lengths = pd.Series(nx.get_edge_attributes(G3, 'length'))
        length_total = lengths.sum()
        circuity = calculate_circuity(G3, length_total)

        rslt = {
            'country'                    : country,
            'city'                       : citygraphml,
            'subset'                     : subsetnamenocsv,
            'orientation_entropy'        : ent,
            'orientation_order'          : order,
            'prop_4way'                  : prop_4way,
            'prop_3way'                  : prop_3way,
            'prop_deadend'               : prop_deadend,
            'node_count'                 : n,
            'elev_iqr'                   : elev_iqr,
            'elev_mean'                  : elev_mean,
            'elev_median'                : elev_median,
            'elev_range'                 : elev_range,
            'elev_std'                   : elev_std,
            'circuity'                   : circuity
                }

        filepathoutput = os.path.join(indicators_street_path, outputfolder, subsetname2)
        df = pd.DataFrame(list())
        df.to_csv(filepathoutput)
        import csv
        dict = rslt
        w = csv.writer(open(filepathoutput, "w"))
        for key, val in dict.items():
            w.writerow([key,val])
